
# launch Qwen1.5 7B
bash run_pretrain_megatron_qwen.sh  \
dsw  \
../../ \
7B   \
8    \
512 \
5e-5   \
1e-6   \
1024  \
1024  \
293   \
bf16  \
2   \
2  \
sel  \
true   \
false  \
false   \
false   \
20000000  \
/gemini/platform/public/datasets/wudao_test/bin_data/wudao_qwenbpe_content_document  \
/gemini/platform/public/llm/huggingface/Qwen/Qwen1.5-7B-Chat-megatron-tp2-pp2  \
19660395   \
0   \
/gemini/platform/public/users/daoguang/megatron_ckpts/Qwen1.5-7B-megatron-tp2-pp2-24.4.24-debug


# launch Qwen1.5 0.5B
# bash run_pretrain_megatron_qwen.sh  \
# dsw  \
# ../../ \
# 0.5B   \
# 1    \
# 8 \
# 1e-5   \
# 1e-6   \
# 128  \
# 128  \
# 293   \
# bf16  \
# 1   \
# 1  \
# sel  \
# true   \
# true  \
# true   \
# false   \
# 100000  \
# /gemini/platform/public/datasets/wudao_test/bin_data/wudao_qwenbpe_content_document  \
# /gemini/platform/public/llm/huggingface/Qwen/Qwen1.5-0.5B-megatron-tp1-pp1  \
# 100000000   \
# 10000   \
# /gemini/platform/public/users/daoguang/megatron_ckpts/Qwen1.5-0.5B-megatron-tp1-pp1-24.4.24-debug